***********************************************************************************************************
* This replication file estimates the market expansion and additional consumer surplus generated by entry *
***********************************************************************************************************

*************************
* 0 Set path to files *
*************************
clear all
capture log close
set more off
set trace off
mata: mata set matastrict off
cd $main_directory


******************************************************************************************************************************
* 1. Predict changes in output and surplus from addition based on a model with just 1 one product (i.e. Q=all transactions). *
******************************************************************************************************************************

*********************************************************************************************
* 1.1 Set predetermined values and calculate all variables necessary for demand prediction. *
*********************************************************************************************

clear all
sca scale=10
//Load the data and the commands used in the estimation of the spatial demand model
use "Data_Belgium\Generated_data\baseline_demand_dataset.dta"
qui do "Command_files\spatial_demand_Q.do"

//Load estimates from the demand model, as well as cost-price parameters:
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters", replace
mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)
//Set predetermined values 
set seed 1234
global select_lambda `" 1"'
sca lambda=$select_lambda 

//Calculate the total demand on the market
qui su Q if common==1
sca sumQ=r(sum)
sca di sumQ
//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation
//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma=scale*sumQ/sumPopulation
sca di gamma
//Calculate the market size (i.e. number of possible transactions)
gen market_size=gamma*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand=Q 
drop if firm_demand==.
drop if firm_demand<=0

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)

// Controls: 
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total firm_ID firm_demand market_ID_num market_size $controls_utility

******************************************************************************************
* 1.2 Add notaries to existing offices and calculate counterfactual sales of every firm. *
******************************************************************************************
//Generate unique firm_IDs and store them in a vector named "firm_IDs" in mata.
preserve
sort firm_ID
collapse Q, by(firm_ID)
mata:	firm_IDs    = st_data(., "firm_ID") 
restore

//Generate unique market_IDs and store them in a vector named "market_IDs" in mata.
preserve
sort market_ID_num
collapse Q, by(market_ID)
mata:	market_IDs    = st_data(., "market_ID") 
restore

//Create an empty matrix in which to store the expected sales of each notary office in the counterfactuals.
//Each row i of "sales_Q_add" corresponds to the sales data if one notary is added to office i.
//The element i,j of "sales_Q_add" contains the sales of firm j if one notary is added to office i.

//First make an empty square matrix (n_firms x n_firms) in which to store the results
mata: length(market_IDs)
mata: sales_Q_add=J(length(firm_IDs),length(firm_IDs),.) 

//Go over each notary office, add one notary, calculate the sales for all offices under the counterfactual and repeat for the next office.
gen touse=. //This is a marker for which observations should be included (may be relevant if new locations can be added or some locations can be closed).
gen NotPerOff_counter=NotPerOff+1 //In the counterfactual, there will be one more notary in the office for which the counterfactual is being calculated.

levelsof firm_ID, local(firm_identifier) //Generate a local variable called firm_identifier, which contains the firm_IDs of all notaries. We will use this in the loop.
levelsof market_ID, local(market_identifier)  //Generate a local variable called market_identifier, which contains the market_IDs of all markets. We will use this in the loop.

foreach l of local firm_identifier { //Go over each firm to add a notary.
di `l' //Display the office ID in order to track how the code is progressing.
sca add_ID=`l' //Define add_ID as the identifier of the notary office in which one notary will be added.
qui replace touse=1 if NotPerOff>0 //Keep all observations in which there is at least one active notary. (This should be all offices for now.)
qui replace touse=1 if firm_ID==add_ID & NotPerOff_counter>0 //Keep the notary office if, after the addition, there is at least one notary in the office. (Since all offices start with at least one notary, this is superfluous, but it may be useful if you would like to add notaries to new locations.)
qui replace touse=0 if firm_ID!=add_ID & NotPerOff<1 //Drop notary offices with no notaries. (Again, for now this is superfluous.)
qui replace lnNotPerOff=log(NotPerOff) //Calculate the log of the notaries per office to use in the regression.
qui replace lnNotPerOff=log(NotPerOff_counter) if firm_ID==add_ID & NotPerOff_counter>0 //For the notary office in which a notary will be added, use the counterfactual variable to calculate the log.

//Calculate output using the counterfactual dataset.
sort market_ID
mata {
 add_ID=st_numscalar("add_ID") //Store the ID of the notary which will be added in Mata.
 X    = st_data(., "$controls_total","touse") //This selects the corresponding control variables.
 nx    = rows(X) //This calculates the number of observations (used to generate a unit vector for the constant).
 X    = X,J(nx,1,1) //This adds a constant to the control variables.
 Results=spdemand_log(X,betas_Q) //Results[,1] is the ID of the notary //Results[,2] is the log of the estimated demand per notary//Results[,3] is the log of the actual observed demand.
 n_firms=rows(Results) //The number of rows corresponds to the number of firms.
 st_numscalar("n_firms",n_firms) //Export the number of firms into the scalar n_firms.
	
	//The outer loop goes over all firms, to find the row in which the counterfactual output should be saved (the one where the row ID corresponds to the add ID)
	for(i=1; i<=length(firm_IDs); i++) { //i goes over the vector of firm IDs, to find the one where a notary was added.
 if (add_ID==firm_IDs[i]) { //Here we identify the row i, in which the ID of the firm is the same as the ID of the office where a notary was added.
 
 //The inner loop then goes over all firms and adds their counterfactual output.
	for(f=1; f<=length(firm_IDs); f++) { //f goes over all notary IDs
		for(j=1; j<=n_firms; j++) { //j goes over all notaries in the results to find the estimates for competitors and the firm itself.
	if (firm_IDs[f]==Results[j,1]) { //If the firm ID f, corresponds to the firm ID recorded in the results table, then add the sales in element i,f of the sales matrix.
	sales_Q_add[i,f]=round(exp(Results[j,2]),0.01) //The estimated log of sales is transformed. First, we take the exponent to get the actual sales and then we round to the second digit after the decimal point to save space.
	}
	}
  } //End of inner loop
  
 }
} //End of outer loop
} //End of mata commands
} //End of loop over all firms

mata: mata matsave "Data_Belgium\Generated_data\sales_Q_add" sales_Q_add, replace

qui replace lnNotPerOff=log(NotPerOff) //Make sure that no permanent changes are made.

**************************************************************************************************************************************************************************
* 1.3 Add notaries to existing offices and calculate counterfactual consumer surplus on the market: structure is similar to the one above. For comments, check point 1.2 *
**************************************************************************************************************************************************************************
clear all
use "Data_Belgium\Generated_data\baseline_demand_dataset.dta", clear
qui do "Command_files\spatial_demand_Q.do"

//Set predetermined values:
sca scale=10
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse ".Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters", replace
mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)
//Set predetermined values 
set seed 1234
global select_lambda `" 1"'
sca lambda=$select_lambda 

//Calculate the total demand on the market
qui su Q if common==1
sca sumQ=r(sum)
sca di sumQ
//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation
//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma=scale*sumQ/sumPopulation
sca di gamma
//Calculate the market size (i.e. number of possible transactions)
gen market_size=gamma*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand=Q 
drop if firm_demand==.
drop if firm_demand<=0

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)

// Controls
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total firm_ID firm_demand market_ID_num market_size $controls_utility

levelsof firm_ID, local(firm_identifier) //Generate a local variable called firm_identifier, which contains the firm_IDs of all notaries. We will use this in the loop.

//Generate unique firm_IDs
preserve
sort firm_ID
collapse Q, by(firm_ID)
mata:	firm_IDs    = st_data(., "firm_ID") 
restore

//Generate unique market_IDs
preserve
sort market_ID
collapse Q, by(market_ID)
mata:	market_IDs    = st_data(., "market_ID") 
restore

//Make a matrix in which to store the CS change due to the addition of a notary in each of the firms.
mata: CS_total_add=J(length(firm_IDs),1,.)
gen touse=.
gen NotPerOff_counter=NotPerOff+1
foreach l of local firm_identifier {
di `l'
sca add_ID=`l'
qui replace touse=1 if firm_ID!=add_ID
qui replace touse=0 if firm_ID==add_ID & NotPerOff_counter<1 //Makes no sense for addition, but still.
qui replace touse=1 if firm_ID==add_ID & NotPerOff_counter>0
qui replace lnNotPerOff=log(NotPerOff)
qui replace lnNotPerOff=log(NotPerOff_counter) if firm_ID==add_ID & NotPerOff_counter>0

//Surplus calculation
sort market_ID
mata {
 add_ID=st_numscalar("add_ID")
 X    = st_data(., "$controls_total","touse") //This selects the corresponding control variables.
    nx    = rows(X)
    X    = X,J(nx,1,1)
	CS_matrix=consumer_surplus(X,betas_Q,transportation_cost) //This matrix contains the market_ID in the first column, and the consumer surplus in the market in the second column.
    CS_total=sum(CS_matrix[,2]) //The total consumer surplus is the sum over all markets.
	for(i=1; i<=length(firm_IDs); i++) { //i goes over the firm combinations to check which notary was added
 if (add_ID==firm_IDs[i]) {
	CS_total_add[i,1]=CS_total
	}
	}
  }
 }
mata: mata matsave "Data_Belgium\Generated_data\CS_total_add" CS_total_add, replace
qui replace lnNotPerOff=log(NotPerOff)


**********************************************************************************************
* 2. Predict changes in output and surplus from addition for real-estate products (i.e. Q1). *
**********************************************************************************************

*********************************************************************************************
* 2.1 Set predetermined values and calculate all variables necessary for demand prediction. *
*********************************************************************************************
clear all
use "Data_Belgium\Generated_data\baseline_demand_dataset.dta", clear
qui do "Command_files\spatial_demand_Q.do"

//Set predetermined values:
sca scale=10
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters", replace

mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)
//Set predetermined values 
set seed 1234
global select_lambda `" 1"'
sca lambda=$select_lambda 

//Calculate the total demand on the market
qui su Q1 if common==1
sca sumQ1=r(sum)
sca di sumQ1
//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation
//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma=scale*sumQ1/sumPopulation
sca di gamma
//Calculate the market size (i.e. number of possible transactions)
gen market_size=gamma*population

//Define the product for which demand will be estimated (in this case real-estate transactions).
gen firm_demand=Q1
drop if firm_demand==.
drop if firm_demand<=0

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)

// Controls
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total firm_ID firm_demand market_ID_num market_size $controls_utility

******************************************************************************************
* 2.2 Add notaries to existing offices and calculate counterfactual sales of every firm. *
******************************************************************************************
//Generate unique firm_IDs and store them in a vector named "firm_IDs" in mata.
preserve
sort firm_ID
collapse Q1, by(firm_ID)
mata:	firm_IDs    = st_data(., "firm_ID") 
restore

//Generate unique market_IDs and store them in a vector named "market_IDs" in mata.
preserve
sort market_ID_num
collapse Q1, by(market_ID)
mata:	market_IDs    = st_data(., "market_ID") 
restore

//Create an empty matrix in which to store the expected sales of each notary office in the counterfactuals.
//Each row i of "sales_Q1_add" corresponds to the sales data if one notary is added to office i.
//The element i,j of "sales_Q1_add" contains the sales of firm j if one notary is added to office i.

//First make an empty square matrix (n_firms x n_firms) in which to store the results
mata: length(market_IDs)
mata: sales_Q1_add=J(length(firm_IDs),length(firm_IDs),.) 

//Go over each notary office, add one notary, calculate the sales for all offices under the counterfactual and repeat for the next office.
gen touse=. //This is a marker for which observations should be included (may be relevant if new locations can be added or some locations can be closed).
gen NotPerOff_counter=NotPerOff+1 //In the counterfactual, there will be one more notary in the office for which the counterfactual is being calculated.

levelsof firm_ID, local(firm_identifier) //Generate a local variable called firm_identifier, which contains the firm_IDs of all notaries. We will use this in the loop.
levelsof market_ID, local(market_identifier)  //Generate a local variable called market_identifier, which contains the market_IDs of all markets. We will use this in the loop.

foreach l of local firm_identifier { //Go over each firm to add a notary.
di `l' //Display the office ID in order to track how the code is progressing.
sca add_ID=`l' //Define add_ID as the identifier of the notary office in which one notary will be added.
qui replace touse=1 if NotPerOff>0 //Keep all observations in which there is at least one active notary. (This should be all offices for now.)
qui replace touse=1 if firm_ID==add_ID & NotPerOff_counter>0 //Keep the notary office if, after the addition, there is at least one notary in the office. (Since all offices start with at least one notary, this is superfluous, but it may be useful if you would like to add notaries to new locations.)
qui replace touse=0 if firm_ID!=add_ID & NotPerOff<1 //Drop notary offices with no notaries. (Again, for now this is superfluous.)
qui replace lnNotPerOff=log(NotPerOff) //Calculate the log of the notaries per office to use in the regression.
qui replace lnNotPerOff=log(NotPerOff_counter) if firm_ID==add_ID & NotPerOff_counter>0 //For the notary office in which a notary will be added, use the counterfactual variable to calculate the log.

//Calculate output using the counterfactual dataset.
sort market_ID
mata {
 add_ID=st_numscalar("add_ID") //Store the ID of the notary which will be added in Mata.
 X    = st_data(., "$controls_total","touse") //This selects the corresponding control variables.
 nx    = rows(X) //This calculates the number of observations (used to generate a unit vector for the constant).
 X    = X,J(nx,1,1) //This adds a constant to the control variables.
 Results=spdemand_log(X,betas_Q1) //Results[,1] is the ID of the notary //Results[,2] is the log of the estimated demand per notary//Results[,3] is the log of the actual observed demand.
 n_firms=rows(Results) //The number of rows corresponds to the number of firms.
 st_numscalar("n_firms",n_firms) //Export the number of firms into the scalar n_firms.
	
	//The outer loop goes over all firms, to find the row in which the counterfactual output should be saved (the one where the row ID corresponds to the add ID)
	for(i=1; i<=length(firm_IDs); i++) { //i goes over the vector of firm IDs, to find the one where a notary was added.
 if (add_ID==firm_IDs[i]) { //Here we identify the row i, in which the ID of the firm is the same as the ID of the office where a notary was added.
 
 //The inner loop then goes over all firms and adds their counterfactual output.
	for(f=1; f<=length(firm_IDs); f++) { //f goes over all notary IDs
		for(j=1; j<=n_firms; j++) { //j goes over all notaries in the results to find the estimates for competitors and the firm itself (note that j can be smaller than f, if an office had to close).
	if (firm_IDs[f]==Results[j,1]) { //If the firm ID f, corresponds to the firm ID recorded in the results table, then add the sales in element i,f of the sales matrix.
	sales_Q1_add[i,f]=round(exp(Results[j,2]),0.01) //The estimated log of sales is transformed. First, we take the exponent to get the actual sales and then we round to the second digit after the decimal point to save space.
	}
	}
  } //End of inner loop
  
 }
} //End of outer loop
} //End of mata commands
} //End of loop over all firms

mata: mata matsave "Data_Belgium\Generated_data\sales_Q1_add" sales_Q1_add, replace

qui replace lnNotPerOff=log(NotPerOff) //Make sure that no permanent changes are made.

**************************************************************************************************************************************************************************
* 2.3 Add notaries to existing offices and calculate counterfactual consumer surplus on the market: structure is similar to the one above. For comments, check point 1.2 *
**************************************************************************************************************************************************************************
clear all
use "Data_Belgium\Generated_data\baseline_demand_dataset.dta", clear
qui do "Command_files\spatial_demand_Q.do"

//Set predetermined values:
sca scale=10
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters", replace
mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)
//Set predetermined values 
set seed 1234
global select_lambda `" 1"'
sca lambda=$select_lambda 

//Calculate the total demand on the market
qui su Q1 if common==1
sca sumQ1=r(sum)
sca di sumQ1
//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation
//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma=scale*sumQ1/sumPopulation
sca di gamma
//Calculate the market size (i.e. number of possible transactions)
gen market_size=gamma*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand=Q1 
drop if firm_demand==.
drop if firm_demand<=0

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)

// Controls
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total firm_ID firm_demand market_ID_num market_size $controls_utility

levelsof firm_ID, local(firm_identifier) //Generate a local variable called firm_identifier, which contains the firm_IDs of all notaries. We will use this in the loop.

//Generate unique firm_IDs
preserve
sort firm_ID
collapse Q1, by(firm_ID)
mata:	firm_IDs    = st_data(., "firm_ID") 
restore

//Generate unique market_IDs
preserve
sort market_ID
collapse Q1, by(market_ID)
mata:	market_IDs    = st_data(., "market_ID") 
restore

//Make a matrix in which to store the CS change due to the addition of a notary in each of the firms.
mata: CS_total_Q1_add=J(length(firm_IDs),1,.)
gen touse=.
gen NotPerOff_counter=NotPerOff+1
foreach l of local firm_identifier {
di `l'
sca add_ID=`l'
qui replace touse=1 if firm_ID!=add_ID
qui replace touse=0 if firm_ID==add_ID & NotPerOff_counter<1 
qui replace touse=1 if firm_ID==add_ID & NotPerOff_counter>0
qui replace lnNotPerOff=log(NotPerOff)
qui replace lnNotPerOff=log(NotPerOff_counter) if firm_ID==add_ID & NotPerOff_counter>0

//Surplus calculation
sort market_ID
mata {
 add_ID=st_numscalar("add_ID")
 X    = st_data(., "$controls_total","touse") //This selects the corresponding control variables.
    nx    = rows(X)
    X    = X,J(nx,1,1)
	CS_matrix=consumer_surplus(X,betas_Q1,transportation_cost)
    CS_total=sum(CS_matrix[,2])
	for(i=1; i<=length(firm_IDs); i++) { //i goes over the firm combinations to check which notary was added
 if (add_ID==firm_IDs[i]) {
	CS_total_Q1_add[i,1]=CS_total
	}
	}
  }
 }
mata: mata matsave "Data_Belgium\Generated_data\CS_total_Q1_add" CS_total_Q1_add, replace
qui replace lnNotPerOff=log(NotPerOff)


********************************************************************************************
* 3. Predict changes in output and surplus from addition for other transactions (i.e. Q2). *
********************************************************************************************

*********************************************************************************************
* 3.1 Set predetermined values and calculate all variables necessary for demand prediction. *
*********************************************************************************************
clear all
use "Data_Belgium\Generated_data\baseline_demand_dataset.dta", clear
qui do "Command_files\spatial_demand_Q.do"

//Set predetermined values:
sca scale=10
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters", replace
mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)
//Set predetermined values 
set seed 1234
global select_lambda `" 1"'
sca lambda=$select_lambda 

//Calculate the total demand on the market
qui su Q2 if common==1
sca sumQ2=r(sum)
sca di sumQ2
//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation
//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma=scale*sumQ2/sumPopulation
sca di gamma
//Calculate the market size (i.e. number of possible transactions)
gen market_size=gamma*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand=Q2 
drop if firm_demand==.
drop if firm_demand<=0

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)

// Controls
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total firm_ID firm_demand market_ID_num market_size $controls_utility

******************************************************************************************
* 3.2 Add notaries to existing offices and calculate counterfactual sales of every firm. *
******************************************************************************************
//Generate unique firm_IDs and store them in a vector named "firm_IDs" in mata.
preserve
sort firm_ID
collapse Q2, by(firm_ID)
mata:	firm_IDs    = st_data(., "firm_ID") 
restore

//Generate unique market_IDs and store them in a vector named "market_IDs" in mata.
preserve
sort market_ID_num
collapse Q2, by(market_ID)
mata:	market_IDs    = st_data(., "market_ID") 
restore

//Create an empty matrix in which to store the expected sales of each notary office in the counterfactuals.
//Each row i of "sales_Q2_add" corresponds to the sales data if one notary is added to office i.
//The element i,j of "sales_Q2_add" contains the sales of firm j if one notary is added to office i.

//First make an empty square matrix (n_firms x n_firms) in which to store the results
mata: length(market_IDs)
mata: sales_Q2_add=J(length(firm_IDs),length(firm_IDs),.) 

//Go over each notary office, add one notary, calculate the sales for all offices under the counterfactual and repeat for the next office.
gen touse=. //This is a marker for which observations should be included (may be relevant if new locations can be added or some locations can be closed).
gen NotPerOff_counter=NotPerOff+1 //In the counterfactual, there will be one more notary in the office for which the counterfactual is being calculated.

levelsof firm_ID, local(firm_identifier) //Generate a local variable called firm_identifier, which contains the firm_IDs of all notaries. We will use this in the loop.
levelsof market_ID, local(market_identifier)  //Generate a local variable called market_identifier, which contains the market_IDs of all markets. We will use this in the loop.

foreach l of local firm_identifier { //Go over each firm to add a notary.
di `l' //Display the office ID in order to track how the code is progressing.
sca add_ID=`l' //Define add_ID as the identifier of the notary office in which one notary will be added.
qui replace touse=1 if NotPerOff>0 //Keep all observations in which there is at least one active notary. (This should be all offices for now.)
qui replace touse=1 if firm_ID==add_ID & NotPerOff_counter>0 //Keep the notary office if, after the addition, there is at least one notary in the office. (Since all offices start with at least one notary, this is superfluous, but it may be useful if you want to add notaries to new locations.)
qui replace touse=0 if firm_ID!=add_ID & NotPerOff<1 //Drop notary offices with no notaries. (Again, for now this is superfluous.)
qui replace lnNotPerOff=log(NotPerOff) //Calculate the log of the notaries per office to use in the regression.
qui replace lnNotPerOff=log(NotPerOff_counter) if firm_ID==add_ID & NotPerOff_counter>0 //For the notary office in which a notary will be added, use the counterfactual variable to calculate the log.

//Calculate output using the counterfactual dataset.
sort market_ID
mata {
 add_ID=st_numscalar("add_ID") //Store the ID of the notary which will be removed in Mata.
 X    = st_data(., "$controls_total","touse") //This selects the corresponding control variables.
 nx    = rows(X) //This calculates the number of observations (used to generate a unit vector for the constant).
 X    = X,J(nx,1,1) //This adds a constant to the control variables.
 Results=spdemand_log(X,betas_Q2) //Results[,1] is the ID of the notary //Results[,2] is the log of the estimated demand per notary//Results[,3] is the log of the actual observed demand.
 n_firms=rows(Results) //The number of rows corresponds to the number of firms.
 st_numscalar("n_firms",n_firms) //Export the number of firms into the scalar n_firms.
	
	//The outer loop goes over all firms, to find the row in which the counterfactual output should be saved (the one where the row ID corresponds to the add ID)
	for(i=1; i<=length(firm_IDs); i++) { //i goes over the vector of firm IDs, to find the one where a notary was added.
 if (add_ID==firm_IDs[i]) { //Here we identify the row i, in which the ID of the firm is the same as the ID of the office where a notary was added.
 
 //The inner loop then goes over all firms and adds their counterfactual output.
	for(f=1; f<=length(firm_IDs); f++) { //f goes over all notary IDs
		for(j=1; j<=n_firms; j++) { //j goes over all notaries in the results to find the estimates for competitors and the firm itself.
	if (firm_IDs[f]==Results[j,1]) { //If the firm ID f, corresponds to the firm ID recorded in the results table, then add the sales in element i,f of the sales matrix.
	sales_Q2_add[i,f]=round(exp(Results[j,2]),0.01) //The estimated log of sales is transformed. First, we take the exponent to get the actual sales and then we round to the second digit after the decimal point to save space.
	}
	}
  } //End of inner loop
  
 }
} //End of outer loop
} //End of mata commands
} //End of loop over all firms

mata: mata matsave "Data_Belgium\Generated_data\sales_Q2_add" sales_Q2_add, replace

qui replace lnNotPerOff=log(NotPerOff) //Make sure that no permanent changes are made.

**************************************************************************************************************************************************************************
* 3.3 Add notaries to existing offices and calculate counterfactual consumer surplus on the market: structure is similar to the one above. For comments, check point 1.2 *
**************************************************************************************************************************************************************************
clear all
use "Data_Belgium\Generated_data\baseline_demand_dataset.dta", clear
qui do "Command_files\spatial_demand_Q.do"

//Set predetermined values:
sca scale=10
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters", replace
mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)
//Set predetermined values 
set seed 1234
global select_lambda `" 1"'
sca lambda=$select_lambda 

//Calculate the total demand on the market
qui su Q2 if common==1
sca sumQ2=r(sum)
sca di sumQ2
//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation
//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma=scale*sumQ2/sumPopulation
sca di gamma
//Calculate the market size (i.e. number of possible transactions)
gen market_size=gamma*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand=Q2 
drop if firm_demand==.
drop if firm_demand<=0

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)

// Controls
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total firm_ID firm_demand market_ID_num market_size $controls_utility

levelsof firm_ID, local(firm_identifier) //Generate a local variable called firm_identifier, which contains the firm_IDs of all notaries. We will use this in the loop.

//Generate unique firm_IDs
preserve
sort firm_ID
collapse Q2, by(firm_ID)
mata:	firm_IDs    = st_data(., "firm_ID") 
restore

//Generate unique market_IDs
preserve
sort market_ID
collapse Q2, by(market_ID)
mata:	market_IDs    = st_data(., "market_ID") 
restore

//Make a matrix in which to store the CS change due to the addition of a notary in each of the firms.
mata: CS_total_Q2_add=J(length(firm_IDs),1,.)
gen touse=.
gen NotPerOff_counter=NotPerOff+1
foreach l of local firm_identifier {
di `l'
sca add_ID=`l'
qui replace touse=1 if firm_ID!=add_ID
qui replace touse=0 if firm_ID==add_ID & NotPerOff_counter<1 
qui replace touse=1 if firm_ID==add_ID & NotPerOff_counter>0
qui replace lnNotPerOff=log(NotPerOff)
qui replace lnNotPerOff=log(NotPerOff_counter) if firm_ID==add_ID & NotPerOff_counter>0

//Surplus calculation
sort market_ID
mata {
 add_ID=st_numscalar("add_ID")
 X    = st_data(., "$controls_total","touse") //This selects the corresponding control variables.
    nx    = rows(X)
    X    = X,J(nx,1,1)
	CS_matrix=consumer_surplus(X,betas_Q2,transportation_cost)
    CS_total=sum(CS_matrix[,2])
	for(i=1; i<=length(firm_IDs); i++) { //i goes over the firm combinations to check which notary was added
 if (add_ID==firm_IDs[i]) {
	CS_total_Q2_add[i,1]=CS_total
	}
	}
  }
 }
mata: mata matsave "Data_Belgium\Generated_data\CS_total_Q2_add" CS_total_Q2_add, replace
qui replace lnNotPerOff=log(NotPerOff)
